rm(list = ls())
library("knitr") # pour avoir un format table dans les sorties
library("ggplot2") # pour avoir de "beaux"" graphiques
library("FactoMineR") # pour effectuer l'ACP
library("factoextra") # pour extraire et visualiser les résultats issus de FactoMineR
library("corrplot") # pour avoir une représentation des corrélations
library("plotly")
Présentation des données
EauxMinerales = read.csv2("https://raw.githubusercontent.com/GasparMassiot/GMN3A/master/data/EauxMinerales.csv", dec=",", row.names=1)
knitr::kable(head(EauxMinerales))
| Alet |
63.0 |
23.00 |
1.8 |
13.0 |
300.0 |
14.00 |
11.0 |
2.0 |
NG |
| Arcens |
88.0 |
66.00 |
7.0 |
290.0 |
1280.0 |
3.77 |
52.0 |
0.0 |
G |
| Ariegeoise |
3.7 |
0.58 |
0.5 |
1.8 |
6.7 |
8.00 |
0.8 |
1.0 |
NG |
| Arvie |
183.0 |
93.00 |
144.0 |
616.0 |
2306.0 |
50.00 |
305.0 |
0.0 |
G |
| Auvergne |
4.0 |
1.80 |
0.6 |
3.6 |
24.4 |
1.50 |
0.9 |
0.5 |
NG |
| Beaumont |
10.8 |
4.20 |
1.3 |
20.5 |
82.8 |
8.10 |
5.2 |
0.8 |
NG |
dim(EauxMinerales)
## [1] 35 9
# Statistiques simples
summary(EauxMinerales)
## Ca Mg K Na Si SO4 Cl NO3 Gaz
## Min. : 1.2 Min. : 0.20 Min. : 0.40 Min. : 1.4 Min. : 1.0 Min. : 1.5 Min. : 0.80 Min. : 0.000 Length:35
## 1st Qu.: 55.5 1st Qu.: 5.80 1st Qu.: 1.00 1st Qu.: 7.0 1st Qu.: 178.1 1st Qu.: 12.0 1st Qu.: 5.65 1st Qu.: 0.125 Class :character
## Median : 78.0 Median : 12.00 Median : 3.00 Median : 13.5 Median : 250.0 Median : 25.0 Median : 18.45 Median : 1.500 Mode :character
## Mean :122.0 Mean : 28.06 Mean : 17.74 Mean : 184.1 Mean : 624.6 Mean : 198.3 Mean : 48.50 Mean : 5.747
## 3rd Qu.:160.5 3rd Qu.: 42.00 3rd Qu.: 6.35 3rd Qu.: 152.6 3rd Qu.: 401.5 3rd Qu.: 146.8 3rd Qu.: 39.50 3rd Qu.: 3.150
## Max. :555.0 Max. :110.00 Max. :144.00 Max. :1744.0 Max. :4263.0 Max. :1479.0 Max. :329.00 Max. :82.000
## NA's :1 NA's :1
# Corrélation
correlation=cor(EauxMinerales[-33,1:(dim(EauxMinerales)[2]-1)])
kable(correlation,digits=2)
| Ca |
1.00 |
0.75 |
0.04 |
-0.08 |
0.08 |
0.83 |
-0.05 |
-0.07 |
| Mg |
0.75 |
1.00 |
0.37 |
0.11 |
0.17 |
0.68 |
0.10 |
0.19 |
| K |
0.04 |
0.37 |
1.00 |
0.83 |
0.78 |
-0.01 |
0.76 |
0.24 |
| Na |
-0.08 |
0.11 |
0.83 |
1.00 |
0.87 |
0.00 |
0.77 |
0.22 |
| Si |
0.08 |
0.17 |
0.78 |
0.87 |
1.00 |
-0.09 |
0.75 |
-0.20 |
| SO4 |
0.83 |
0.68 |
-0.01 |
0.00 |
-0.09 |
1.00 |
-0.10 |
0.24 |
| Cl |
-0.05 |
0.10 |
0.76 |
0.77 |
0.75 |
-0.10 |
1.00 |
-0.07 |
| NO3 |
-0.07 |
0.19 |
0.24 |
0.22 |
-0.20 |
0.24 |
-0.07 |
1.00 |
# Visualisation des corrélations
corrplot(correlation)

Analyse en composantes principales
resPCA=PCA(EauxMinerales[-33,], scale.unit=T, quali.sup=9, graph = F)
# les variables supplémentaires sont intégrées au graphe mais ne sont pas
# prises en compte pour l'ACP
Analyse de l’inertie des axes factoriels
get_eigenvalue(resPCA)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.456262127 43.20327659 43.20328
## Dim.2 2.525381046 31.56726307 74.77054
## Dim.3 1.177107787 14.71384734 89.48439
## Dim.4 0.385956448 4.82445559 94.30884
## Dim.5 0.270901788 3.38627235 97.69511
## Dim.6 0.103206116 1.29007645 98.98519
## Dim.7 0.076987286 0.96234108 99.94753
## Dim.8 0.004197402 0.05246753 100.00000
fviz_eig(resPCA, addlabels = T) + geom_hline(yintercept = 10, color = "red")

Etude des variables
var=get_pca_var(resPCA)
ggplotly(fviz_pca_var(resPCA, geom = c("text","arrow"), col.var = "cos2", axes=1:2) + theme_classic())
fviz_pca_var(resPCA, geom = c("text","arrow"), col.var = "cos2", axes=3:4) + theme_classic()

Etude des individus
ind=get_pca_ind(resPCA)
cos2 = rowSums(resPCA$ind$cos2[, 1:2])
fviz_pca_ind (resPCA, col.ind = "cos2",axes=1:2,
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # pour éviter le chevauchement de texte
)

cos2 = rowSums(resPCA$ind$cos2[,3:4])
fviz_pca_ind (resPCA, col.ind = "cos2",axes=3:4,
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

Biplot
fviz_pca_biplot(resPCA, repel = TRUE,
col.var = "#2E9FDF", # Couleur des variables
col.ind = "#696969" # Couleur des individus
)
